


/*
 This dofile replicate the results from 
 "The Physician-Patient Match and Health Inequality"
 by Ida Lykke Kristiansen and Sophie Yanying Sheng
 
 
 Before the Stata dofiles, the SAS program "data_build" should be executed 
*/

/*
The dofile set global paths.
*/

clear
do "...\First.do"

********************************************************************************
* 1: Patient background information
********************************************************************************
/*
Takes all children (full population) and find their highest level of 
completed education and creates a dataset with information about gender, age, mother and father ids

* INPUT: "$raw\Children1.dta", "$raw\Children3.dta" (Generated in SAS)
* FORMAT: "...\formats\hfaudd_level.dta"
* OUTPUT: "$work\Children1.dta", "$work\Children3.dta"
*/

do "$do\1a_children_udd.do"

/*
* Finds cause of death
* INPUT: "$raw\patient_dod_moreyears.dta", "$raw\patient_dod2_moreyears.dta" (Generated in SAS)
* OUTPUT: "$work\deaths.dta"
*/
do "$do\1b_mortality.do"

/*
 Finds reiumbursement to PCPs and GP contacts and servises 
* INPUT: "$raw\gp_all_costs_gp.dta", "$raw\gp_all_costs_gp2.dta" (Generated in SAS)
* TEMP: "$work\costs1.dta"
* OUTPUT: "$work\costs2.dta"
*/
do "$do\1c_costs.do"

/*
 Finds patient use of medicine
* INPUT: "$raw\lmdb_moreyears.dta" (Generated in SAS)
* OUTPUT: "$work\lmdb_new_types.dta"
*/
do "$do\1e_medicine.do"

/*
Finds patient hospital admissions
* INPUT: "$raw\patient_hospital_moreyears.dta", "$raw\diagnosis_moreyears.dta" (Generated in SAS)
* OUTPUT: "$work\hospital_admissions.dta"
*/
do "$do\1f_hospital.do"

/*
 find patient GP serices 
* INPUT: "$raw\gp_all1", "$raw\gp_all_costs_gp2.dta" (Generated in SAS)
* TEMP: "$work\gp_services1.dta",
* OUTPUT: "$work\gp_services.dta"
*/
do "$do\1g_GP_services.do"

/*
Finds information of lung scans at hospitals
* INPUT: "$raw\lung_cancer_test2_moreyears.dta" (Generated in SAS)
* OUTPUT: "$work\lung_scans.dta"
*/
do "$do\1j_lungscans.do"




/*
This file cleans and merge information about the full population of patients. Merges information about death, hospital admissions, prescription drug use. Merge everything together (also together with education) and save as one file "patient_background" 
* INPUT: "$raw\patient_background_moreyears.dta" + The datasets generated in above do-files
* OUTPUT:  "$work\patient_background.dta"
*/
do "$do\1_patient_merge.do"



********************************************************************************
* 2. Doctor background information
********************************************************************************

/*
** Creates a dataset with all doctors in Denmark an adds age and education. 
* INPUT: "$raw\T_yder_1.dta", "$raw\t_yder_person.dta", "$raw\Children3.dta" (Generated in SAS)
* OUTPUT: "$work\doctor_udd.dta"
*/
do "$do\2a_doctor_education.do"


/*
Takes all parents from the dataset and find the parents' highest level of completed education
** INPUT: "$raw\mom_udd.dta", "$raw\patient_background_moreyears.dta", "$raw\dad_udd.dta" (Generated in SAS)
** FORMAT: "...\formats\hfaudd_level.dta"
** OUTPUT: "$work\mom_udd.dta", "$work\dad_udd.dta"
*/
do "$do\2b_parent_edu.do"


/*
Creates a ydernr x year x pnr dataset (identify the personal identifier behind the doctor identifier)
* INPUT: "$raw\T_yder_1.dta", "$raw\t_yder_person.dta", "$work\doctor_udd.dta" 
* TEMP: "$work\T_yder_1.dta", "$work\doctor_1518.dta", "$work\yder2.dta"
* OUTPUT: "$work\Yder_9518_wide.dta"
*/
do "$do\2c_ydernrpnr.do"

/*
Find GP's high school grades
* INPUT: "$raw\All_GRADES.dta", "$work\udd_kode.dta", 
* OUTPUT: "$work\gp_grades.dta"
*/
do "$do\2d_gp_grades.do"


/*
Find GP's ethnic background
* INPUT: "$raw\patient_background_moreyears.dta"
* OUTPUT: "$work\ie_type.dta"
*/
do "$do\2f_ietype.do"


/*
* MERGE PCP information
* INPUT: "$work\Yder_9518_wide.dta", "$work\children1.dta", "$work\children3.dta", "$work\ie_type.dta",  "$work\mom_udd.dta", "$work\dad_udd.dta", "$work\doctor_udd.dta", "$work\gp_grades.dta"
* OUTPUT: "$work\Yder_9518_long.dta" and "$work\gp_ses.dta"
*/
do "$do\2_GP_merge.do"



******************************************
******************************************

/*
Finds PCP clinic closures
* INPUT: "$raw\gp_services_all.dta", "$work\Yder_9518_long.dta"
* OUTPUT: "$work\gp_closures.dta", "$work\gp_closures_yearly.dta", "$work\gp_closures_year.dta"
*/
do "$do\2e_closures"

/*
Find cases of parental illness among doctors- used in a mechanism analysis and merged on later (7d_parental_illness.do)
* INPUT: "$work\Yder_9518_long.dta", "$work\deaths.dta", "$work\lmdb_new_types.dta"
* TEMP: "$work\yder_mom_meds.dta", "$work\yder_dad_meds.dta"
* OUTPUT: "$work\ydernr_parent_illness.dta"
*/
do "$do\2g_parental_illness.do"

/*
Find information on PCP parental income and correlate with SES.
* INPUT: "$work\Yder_9518_long.dta", "$raw\income.dta"
* TEMP: "$work\mom_income1.dta", "$work\dad_income1.dta"
*/
do "$do\2h_parental_income.do"



********************************************************************************
* 3. Merge PCP and patient data
********************************************************************************

/*
Creates the patient x PCP match following the Kjaergaards algorithm 
* INPUT: "$raw\gp_all_new3.dta", "$work\gp_closures.dta" 
* OUTPUT: "$work\patient_gp_new.dta", "$work\patient_gp_year.dta", "$work\N_patients.dta"
*/
do "$do\3a_doctor_patient_match.do"

/*
* Merge patient background and GP background. This file create the main analysis dataset
* INPUT: "$work\patient_background.dta",  "$work\patient_gp_year.dta", "$work\gp_closures_year.dta", "$work\gp_ses.dta"
* OUTPUT:  "$work\full_sample.dta", "$work\analysis_sample.dta", "$work\analysis_sample_pnr.dta"
*/
do "$do\3_merge.do"

/*
Share of low-SES patients in a given PCP and year. Used in the "7c_pcp_characteristics.do"
* INPUT: "$work\patient_gp_year.dta", "$work\full_sample.dta"
* OUTPUT: "$work\mean_ses_yder.dta"
*/
do "$do\3c_yder_pt_ses.do"


/*
Calculates share of post closure PCP SES. Used it "8_pre_mortality_cliniclevel.do"
* INPUT: "$work\analysis_sample.dta"
* OUTPUT: "$work\yder_year_mean_ses.dta"
*/
do "$do\3d_PostClosureSESMeans.do"


********************************************************************************
* 4. Summary Statistics 
********************************************************************************


**** Summary statistics on the patient level
** Table 1 Panel A
**Table D1 - patient level summary statistics
do "$do\4a_pt_sumstat.do"

* Summary statistics on GP/doctor level
** Table 1 panel B and C
do "$do\4b_gp_sumstat.do"

* Test for selection between GPs and patients
** Table 2
** Table D2
do "$do\4c_test_for_selection.do"

** Table D3
do "$do\4d_test_for_selection_conditions.do"

* Figure D2
do "$do\4e_fourlineplot"

* Figure 1 - Mortality gradient 
do "$do\4f_sumstat_fig_gradient_mortality.do"

* Figure D1: Health SES gradient
do "$do\4g_sumstat_fig_gradients.do"


********************************************************************************
*5 Main analysis
********************************************************************************

* DDD regressions
** Table 3
** Table A9
** Table D4
** Table D5
do "$do\5a_main_results.do"

* Event figures 
** Figure 2
** Figure 3
** Figure A1
do "$do\5b_event_figures.do"

* Detection vs. adherence
** Table D6
** Table D7
** Table D8
do "$do\5c_detection_vs_adherence.do"



********************************************************************************
*6 Robustness Checks
********************************************************************************
* INPUT: "$work\analysis_sample5.dta"
* Out: TableA1: Expand to ages 30-80
do "$do\6a_robustness_age.do"

* Test for robustness
* INPUT: "$work\analysis_sample.dta"
* OUT: TableA2, TableA3 TableA4, TableA8_PanelA, TableA8_PanelB
do "$do\6b_robustness.do"


* Table A5:
* INPUT: "$work\analysis_sample.dta"
* OUT: Table A5
do "$do\6c_robustness_SES_missing_incl.do"

* Out: Table A6, Table A7
do "$do\6d_soldclinic.do"


********************************************************************************
* 7 Mechanisms
********************************************************************************
* Out: Table C1
do "$do\7a_fragile_patients.do"

* External validation
** Table C2
do "$do\7b_general_concordance.do"


* INPUT: "$work\analysis_sample.dta", "$work\mean_ses_yder.dta"
* Table D10 
* Table D11
do "$do\7c_PCP_Characteristics.do"


** PARENTAL ILLNESS
* Table D9
do "$do\7d_parental_illness.do"



********************************************************************************
* 8. Test for pre-trends in mortality 
*** Appendix B
********************************************************************************

* Test for pre-trends in mortality
** Clinic level
do "$do\8_pre_mortality_cliniclevel.do"


** Individual level
do "$do\8_pre_mortality_idlevel.do"



********************************************************************************
* 9 Survey data mechanism
********************************************************************************
** The survey results was generated on a different server, since the survey data 
** was only available there, and the main part of the paper was not included on 
** that server. The results were generated by using the do-files from this project 
** to create the population data with the relevant variables, and then merging 
** it with the survey data provided, which had already been cleaned by the data providers.

do "$do\9_survey.do"

